In [4]:
import numpy as np
from scipy.sparse import coo_matrix
import sqlite3
conn = sqlite3.connect("mydatabase.db") 
cursor = conn.cursor()
In [5]:
def get_data(start=0,count = 10000):
#     count = 100000
    cursor.execute("SELECT * FROM cross limit {0}".format(count))


    cross_tab = np.array(cursor.fetchall())[:,1:]

    cursor.execute("SELECT COUNT(song_id) FROM cross WHERE id < {0}".format(count))
    num_col = cursor.fetchall()[0][0]
    cursor.execute("SELECT COUNT(playlist_id) FROM cross WHERE id < {0}".format(count))
    num_row = cursor.fetchall()[0][0]



    row  = cross_tab[:,0]
    col  = cross_tab[:,1]
    data = np.ones(cross_tab.shape[0])

    cross_tab = np.append(cross_tab, [[row[-1] + 1,1]], axis = 0)
#     print(cross_tab)
    num_row += 1

    cross_tab = coo_matrix((data, (row, col)), shape=(num_row, num_col))
    coo_matrix.sum_duplicates(cross_tab)

    return cross_tab
In [6]:
cross_tab = get_data()
In [7]:
# from sklearn.model_selection import train_test_split
from lightfm.cross_validation import random_train_test_split

train, test = random_train_test_split(cross_tab, test_percentage=0.3)
C:\Users\HeavyChevy\anaconda3\lib\site-packages\lightfm\_lightfm_fast.py:9: UserWarning: LightFM was compiled without OpenMP support. Only a single thread will be used.
  warnings.warn(
In [8]:
from lightfm import LightFM
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

model = LightFM(learning_rate=0.05, loss='warp',no_components = 16)
model.fit_partial(train, epochs=15)

train_precision = precision_at_k(model, train, k=10).mean()
test_precision = precision_at_k(model, test, k=10, train_interactions=train).mean()

train_auc = auc_score(model, train).mean()
test_auc = auc_score(model, test, train_interactions=train).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))
Precision: train 0.89, test 0.01.
AUC: train 1.00, test 0.42.
In [9]:
item_embeddings = model.item_embeddings
user_embeddings = model.user_embeddings
In [10]:
with open('item_embeddings.npy', 'wb') as f:
    np.save(f, item_embeddings)
In [11]:
with open('user_embeddings.npy', 'wb') as f:
    np.save(f, user_embeddings)
In [12]:
cursor.execute("SELECT artist_name FROM songs WHERE id < 100000 GROUP BY artist_name ORDER BY COUNT(*) DESC LIMIT 100;")
artists = np.array(cursor.fetchall())
# artists
In [31]:
selected_songs = []

res_artist = []
un_art = {}
for art in artists:
    cursor.execute("SELECT id FROM songs WHERE (artist_name = '{0}') and (id < 10000)".format(art[0]))
    tmp = cursor.fetchall()
    selected_songs += tmp
    res_artist += [art[0]] * len(tmp)
    un_art[art[0]] = 1
    
# selected_songs
In [17]:
emb = np.load('item_embeddings.npy')
In [18]:
res_pts = []
res_songs = []
for sl in selected_songs:
    res_songs.append(emb[sl])
# res_songs
In [19]:
from sklearn.manifold import TSNE
X_embedded = TSNE(n_components=2).fit_transform(res_songs)
X_embedded.shape
Out[19]:
(1473, 2)
In [20]:
with open('TSNE.npy', 'wb') as f:
    np.save(f, X_embedded)
In [21]:
import pandas as pd
df = pd.DataFrame(X_embedded)
df['artist'] = res_artist
df['artist'].replace('','Unknown',inplace=True)
In [22]:
import plotly.express as px
fig = px.scatter(df, x=0, y=1, color="artist")
fig.show()
In [45]:
col = []
for i in df['artist']:
    try:
        col.append(list(un_art.keys()).index(i))
    except:
        col.append(-1)
In [46]:
import matplotlib.pyplot as plt
plt.scatter(df[0],df[1],c = col)
plt.show()
In [56]:
user_id = 1
n_users, n_items = train.shape
scores = model.predict(user_id, np.arange(n_items))
scores
Out[56]:
array([ 0.3914338 ,  0.5463036 ,  0.5903043 , ..., -0.4615834 ,
       -0.4641547 , -0.50609154], dtype=float32)
In [ ]: